
*this file generates the smoothed relative Engel curves from raw data inputs

qui {
**********need to split up and include the loop

local start = `1'
local finish = `2'
local Gi_spec "`3'"
local share_spec "`4'"
local band_spec "`5'"
global dump "`6'"
global dropbox "`7'"
local iteration = `8'
local foldno = "`9'"
local bs = `10'

set more off
set matsize 11000



foreach DMI in   `Gi_spec'  {


cd "${dump}/Engel`foldno'/"



********************************************************************************
*Step 2: 
*Pull in one market-round at a time from the dataset above. 
*There will be the number of observations=number of households observed in market (possibly less than 100).
********************************************************************************



forval n=`start'/`finish' {
noi di _c "`n' "


capture confirm file Engel_`band_spec'_`share_spec'_`DMI'_`n'_bs`bs'.dta
if _rc!=0 {

foreach rnd in 43   55   {  

capture confirm file Engel_`band_spec'_`share_spec'_`DMI'_`n'_`rnd'_bs`bs'.dta
if _rc!=0 {


use if market_id==`n' & round==`rnd' & count_hh>5 using "$dropbox/replication_files/data/intermediate_data/hh_shares/hh_shares_43_55`DMI'_rural_bs`bs'.dta" , clear




if _N>0 {

local nhhds=_N

********************************************************************************
*Step 3:
*Generate a mpce rank for each household in the market round, subtract 0.5, divide by total households and then multiply by 100. This gives each household a point percentile (e.g. 38.823 if it is the 33st hhold in a market of 85). Call this rank_pctile:
********************************************************************************
sort logexp_cap
gen rank_pctile=100*(_n-0.5)/_N



********************************************************************************
*Step 4:
*Make sure there are 100 obs for every market-year, and generate a variable percentile that takes values 1-100:
********************************************************************************


if _N<101 {
set obs 101

*fill in missing market level variables for new obs
foreach var of varlist total_wt sector state43 district43 round   count_hh market_year_id market_id  { 
replace `var'=`var'[_n-1] if `var'==. 
}

}
gen percentile=_n-1 if _n<=101


********************************************************************************
*Step 5:
*Now we run a lpoly to get the predicted mpce at 100 points in the income distribution.
********************************************************************************



foreach winsor in  "w" {   // allow for winsorized and not winsorized

gen bw_mpce=101/(`nhhds'-1) 

*weight here using survey weights to get right percentiles
lpoly logexp_cap`winsor' rank_pctile  [aw=int_wt], at(percentile) gen(predict_lnmpce`winsor')  bw(bw_mpce)  degree(1) nograph



drop bw_mpce

********************************************************************************
*Step 6:
*Loop over the products, running lpoly bshare log_mpce_w, at(predict_lnmpce) 
********************************************************************************

*first create bandwidths for lpoly Engel curves 


if regexm("`band_spec'","^g")==1 {

local wid=regexr("`band_spec'","^g","")

foreach width in    `wid'  {  
	gen `winsor'bwg`width'=`width'*(predict_lnmpce`winsor'[101] - predict_lnmpce`winsor'[1])/100 if _n<=101
}

}


if regexm("`band_spec'","^y")==1 & "`band_spec'"!="y0" {

local wid=regexr("`band_spec'","^y","")

foreach width in  `wid'  { 
gen `winsor'bwy`width'=`width'/100 if _n<=101
}

}


*now calculate Lpolys



foreach var of varlist  `share_spec'_*  {  

	foreach bw of varlist `winsor'bw* {
		cap lpoly `var' logexp_cap`winsor' [aw=int_wt], at(predict_lnmpce`winsor') gen(lp_`bw'_`var') se(se_`bw'_`var') bw(`bw') degree(1) nograph
			if _rc!=0 {
			gen lp_`bw'_`var'=.
			gen se_`bw'_`var'=.
			}
	}
	

}


}






********************************************************************************
*Step 7: 
*Drop everything apart from the 100 percentile values, predict_lnmpce, the estimated curves and SEs, and the characteristics of the market-product (number of zeros, number of obs etc). 
*Then will append those 100 obs into the final data file. 
********************************************************************************


keep if _n<=101
keep sector state43 district43 round count_hh market_year_id market_id total_wt predict_lnmpce*  percentile   lp_* se_* 


*gen percentile=_n-1

order market_year_id market_id sector state43 district43 round  count_hh total_wt   predict_lnmpce* percentile  

*save temp_files/xmarket_year_`n'.dta, replace

local market=market_id[1]
*local round=round[1]

*get rid of labels for easier plotting later
foreach var of varlist lp_* se_* {
	label var `var' ""
}

renvars round count_hh market_year_id  total_wt  predict_lnmpce*    lp_* se_*  ,pref(r`rnd')  


save Engel_`band_spec'_`share_spec'_`DMI'_`market'_`rnd'_bs`bs'.dta, replace

}
}

}




capture confirm file Engel_`band_spec'_`share_spec'_`DMI'_`market'_43_bs`bs'.dta
if _rc==0 {
capture confirm file Engel_`band_spec'_`share_spec'_`DMI'_`market'_55_bs`bs'.dta
if _rc==0 {

use Engel_`band_spec'_`share_spec'_`DMI'_`market'_43_bs`bs'.dta, clear
merge 1:1 percentile using Engel_`band_spec'_`share_spec'_`DMI'_`market'_55_bs`bs'.dta, nogenerate

save Engel_`band_spec'_`share_spec'_`DMI'_`market'_bs`bs'.dta, replace

}
}


cap erase Engel_`band_spec'_`share_spec'_`DMI'_`market'_43_bs`bs'.dta
cap erase Engel_`band_spec'_`share_spec'_`DMI'_`market'_55_bs`bs'.dta

}
*skip if already run

}
*market id loop

clear
set obs 100
gen test=1
save InstanceA_`band_spec'_`share_spec'_`DMI'_`iteration'_bs`bs'.dta, replace

}
*DMI

}
*end qui




exit, clear STATA


